

************************************************************
************************************************************
***          Marriage and Happiness                      ***
*** A Real Data Example: Marriage and Happiness (chap. V)***
***		    Josef Brüderl, Volker Ludwig                 *** 	   
***		          March 2015                             *** 	   
************************************************************
************************************************************

/* Data: SOEP 1984-2009 v26

We start with file "Happiness2.dta"
This file was prepared with "Happiness 2 DataPrep.do"

The lecture-package includes an anonymized version of this file:
A 50% random sample (on the person level), with artificial IDs,
and with a normally distributed random error (m=0, sd=10) added to
household income. Therefore, results are very similar, but not 
identical to the ones reported in the lecture. */


*************************************
** Preliminaries    *****************
*************************************
clear 
set seed 17838276                          //Set seed to replicate bootstrap

* Load data
cd  "C:\Users\bruederl\LRZ Sync+Share\Vorlesung PDA\Stata Beispiele\"    //Adapt this path!
use "Happiness2.dta", clear

***** Declare data to be panel data *****
xtset id year

****   New variables needed below       **********
bysort id: egen treat = max(marry)        //indicator for treatment group 
bysort id (year): gen pynr    = _n        //person-year ID (within person)
bysort id:        gen pycount = _N        //# of person-years (within person)
gen cohort = year - age                   // birth cohort
gen             h1 = year - yrsmarried    //current year or marriage year
bysort id: egen h2 = max(h1) if treat==1  //marriage year
gen  hhinc=exp(loghhinc)                  //HHincome in natural units



***************************************************************
********    Some description of the data          *************
***************************************************************

*** Dependent and central independent variable
tab1 happy marry

*** describe the 40 patterns most frequently observed in the sample 
xtdes, pattern(40)

*** Distribution of the length of the panels
tab pycount if pynr==1

*** Plot individual panels
xtline happy if id==602 | id==901, ylabel(0(1)10, grid) overlay 

*** Check marriage indicator
xttab marry 

*** Information on transitions
xttrans marry , freq

*** How does the size of the married (treatment) group develop after marriage?
xttab yrsmarried if marry==1

*** Comparing overall happiness in treatment and control group
xtsum happy if treat==0
xtsum happy if treat==1

*** Is there enough within variance on the independent variables?
xtsum marry age loghhinc woman


********************************************************
***   Panel-robust statistical inference	************
********************************************************

* Conventional S.E.s
xtreg   happy marry age loghhinc, fe
est store FE

* Panel-robust S.E.s (alternative: vce(robust))
xtreg   happy marry age loghhinc, fe  vce(cluster id)
est store FEcluster

* Even robust S.E.s are often biased: therefore bootstrap
xtreg   happy marry age loghhinc, fe  vce(bootstrap)
est store FEboot

* Table of estimation results
estimates table FE FEcluster FEboot, b(%9.4f) se(%9.4f) t(%9.4f)   ///
         keep(marry age loghhinc) 

* Obviously, with over 14,000 clusters asymptotics work well
* Therefore, in the following we will use the cluster S.E.s



**********************************************************************
*******           Step Impact Function                    ************
*******           Comparing BE, POLS, RE, FE and FD       ************
**********************************************************************
* Here it is essential to control for "cohort". Otherwise the marriage 
* effects of BE, POLS, and RE would be heavily biased downwards!

* Between Regression
xtreg   happy marry age loghhinc woman i.cohort, be
est store BE
* Pooled OLS
reg   happy marry age loghhinc woman i.cohort, vce(cluster id)
est store POLS
* Random-Effects Regression
xtreg   happy marry age loghhinc woman i.cohort, re  vce(cluster id) theta
est store RE
* Fixed-Effects Regression
xtreg   happy marry age loghhinc woman, fe  vce(cluster id)
est store FE
* First-Difference Regression
* Here we loose one person-year per person and another one for each gap
count if year - year[_n-1] > 1 & id == id[_n-1]   //number of gaps in the data
regress D.(happy marry age loghhinc woman), noconstant vce(cluster id)
est store FD

* Table of estimation results
estimates table BE POLS RE FE FD, b(%7.2f) star stfmt(%6.0f) stats(N N_clust) ///
                keep(marry loghhinc woman)

* Comparing graphically
* ssc install coefplot, replace    // Install "coefplot" package (Jann 2014)
coefplot (POLS, pstyle(p2)) (RE, pstyle(p3)) (FE, pstyle(p4)),          ///
         keep(marry loghhinc woman) xline(0)                            ///
         coeflabels(marry="marriage" loghhinc="ln(HHincome)")           ///
         xlabel(-.1(.1).4, grid)                                        ///
         xtitle("Effect on happiness", size(medlarge) margin(0 0 0 2))  ///
		 title("Regression Coefficients with 95% CIs") legend(row(1))

*** Hausman Test (unfortunately does not work with panel-robust S.E.s)
xtreg happy marry age loghhinc i.cohort, re
est store RE2
xtreg happy marry age loghhinc, fe
est store FE2
hausman FE2 RE2, sigmamore



*****************************************************************
*****      Why do we need a control group?                  *****
*****************************************************************

* I) Model includes only marriage
xtreg happy marry if treat==1,  fe      //only those who married
est store CG1
xtreg happy marry            ,  fe      //plus control group
est store CG2

* II) Model adds other time-varying variables
xtreg happy marry age if treat==1,  fe  //only those who married
est store CG3
xtreg happy marry age            ,  fe  //plus control group
est store CG4
				
estimates table CG1 CG2 CG3 CG4, b(%7.3f) se(%7.4f) stfmt(%6.0f) stats(N N_clust) ///
                drop(_cons)

				
*****************************************************************
*****      Optimal age specification?                       *****
*****************************************************************

* I) Age linear
xtreg   happy marry loghhinc    age, fe  vce(cluster id)
est store AG1

* I) Age dummies
xtreg   happy marry loghhinc  i.age, fe  vce(cluster id)
est store AG2
	
estimates table AG1 AG2, b(%7.3f) star stfmt(%6.0f) stats(N N_clust) ///
                keep(marry loghhinc age)

				
************************************************************
*******           Continuous Impact Function    ************
************************************************************

* This is the FE model with quadratic IF
xtreg happy i.marry c.yrsmarried##c.yrsmarried age loghhinc, fe vce(cluster id)

* Plotting the coefficients (Simple Effect Plot)
coefplot, drop(_cons) xline(0)                                               ///
          coeflabels(1.marry="marriage" age="age" loghhinc="ln(HHincome)")   ///
          xlabel(-.1(.05).3, grid format(%3.2f))                             ///
          xtitle("Effect on happiness", size(medlarge) margin(0 0 0 2))      ///
		  title("Regression Coefficients with 95% CIs")

* Plotting the marginal marriage effects (Conditional Effect Plot)
margins, at(marry=(0 1) yrsmarried=(0(1)15)) contrast(atcontrast(r._at) lincom) noatlegend
marginsplot, recast(line) recastci(rline) yline(0, lcolor(black)) ///
   x(yrsmarried)                                                  /// yrsmarried is on the X-axis
   plot1opts(lstyle(none)) ci1opts(lstyle(none))                  /// omit graph for never-married
   plot2opts(lpattern(solid) lwidth(thick) lcolor(blue))     	  /// estimate for married
   ci2opts(lpattern(dash) lwidth(medthick) lcolor(green))         /// CI for estimate married
   ylabel(-.3(.1).5, grid angle(0) labsize(medium) format(%3.1f)) /// 
   xlabel(0(1)15, labsize(medium))                                ///
   xtitle("Years since marriage", size(large) margin(0 0 0 2))    ///
   ytitle("Change in happiness", size(large))   title(" ")        ///
   legend(pos(7) ring(0) row(1) order(2 "95%-CI") size(medlarge))         


*** Wald-Tests for significant marriage effect at marriage duration 0, 5 and 6 years
xtreg happy marry yrsmarried c.yrsmarried#c.yrsmarried age loghhinc, fe vce(cluster id)
lincom marry                                              // just married
lincom marry+yrsmarried*5+c.yrsmarried#c.yrsmarried*25    // 5 yrs. married
lincom marry+yrsmarried*6+c.yrsmarried#c.yrsmarried*36    // 6 yrs. married


/* Plotting AMEs (Effect Plot) [does not make sense here]
margins, dydx(*)
marginsplot, horizontal xline(0) plotopts(connect(i))                ///
        xlabel(-.1(0.05).3, grid labsize(medium) format(%3.2f))      ///
		ytitle(" ") ylabel(, labsize(medlarge)) title(, size(large)) ///
		xtitle("Effect on happiness", size(large) margin(0 0 0 2)) 

* Plotting the predicted values (Profile Plot) [does not make sense with FE]
margins, at(yrsmarried=(0(1)15) marry=1) noatlegend
marginsplot, recast(line) recastci(rline)                           ///
        ylabel(5(1)10, grid angle(0)) xlabel(0(1)15)                ///
        xtitle("Years since marriage", size(large) margin(0 0 0 2)) ///
        ytitle("Happiness", size(large))                            ///
        plotopts(lpattern(solid) lwidth(thick) lcolor(blue))     	///
        ciopts(lpattern(dash) lwidth(medthick) lcolor(green))    
*/		


*******          Comparing POLS, RE and FE      ************
reg   happy marry yrsmarried yrsmarried2 age loghhinc woman i.cohort, vce(cluster id)
est store POLS1
xtreg happy marry yrsmarried yrsmarried2 age loghhinc woman i.cohort, re vce(cluster id)
est store RE1
xtreg happy marry yrsmarried yrsmarried2 age loghhinc, fe vce(cluster id)
est store FE1
* Table of estimation results
estimates table POLS1 RE1 FE1, star b(%9.4f)                              ///
         keep(marry yrsmarried yrsmarried2 age loghhinc woman) 

* Conditional Effect Plot
* Here done "by hand", because we compare the three models in one plot
estimates restore POLS1
gen POLS=_b[marry]+_b[yrsmarried]*yrsmarried+_b[yrsmarried2]*yrsmarried2 in 1/1000
est restore RE1
gen RE=_b[marry]+_b[yrsmarried]*yrsmarried+_b[yrsmarried2]*yrsmarried2   in 1/1000
est restore FE1
gen FE=_b[marry]+_b[yrsmarried]*yrsmarried+_b[yrsmarried2]*yrsmarried2   in 1/1000
line POLS RE FE yrsmarried if yrsmarried<=15, sort                   ///
        lwidth(thick thick thick)                                    ///       
        lcolor(green red blue)   yline(0, lcolor(black))             ///
        ylabel(-.3(.1).5, grid angle(0) labsize(medium) format(%3.1f)) /// 
		xlabel(0(1)15, labsize(medium))                              ///
        yline(0, lcolor(black))                                      ///
        legend(pos(7) ring(0) row(1))                                /// 
        xtitle("Years since marriage", size(large) margin(0 0 0 2))  ///
        ytitle("Change in happiness", size(large))                   ///
        title("Conditional effect of marriage estimated by POLS, RE, and FE")


************************************************************
*******                Dummy Impact Function    ************
************************************************************

* "h2" is marriage year
gen     h3 = year - h2 if treat==1        //event centered time scale for treated
recode  h3  min/-1=-1   15/max=15         //group on the borders
gen     ym = h3+1                         //bring all to positive values
recode  ym  . = 0                         //not-treated are in ref group

xtreg happy i.ym age loghhinc, fe vce(cluster id)

coefplot, keep(*.ym) vertical yline(0) recast(line) lwidth(thick) lcolor(blue)  ///
          ciopts(recast(rline) lpattern(dash) lwidth(medthick) lcolor(green))   ///
		  coeflabels(1.ym="0" 2.ym="1" 3.ym="2" 4.ym="3" 5.ym="4" 6.ym="5"      ///
		             7.ym="6" 8.ym="7" 9.ym="8" 10.ym="9" 11.ym="10" 12.ym="11" ///
					 13.ym="12" 14.ym="13" 15.ym="14" 16.ym="15")               ///  
		  ylabel(-.3(.1).5, grid angle(0) labsize(medium) format(%3.1f))        ///
          xtitle("Years since marriage", size(medlarge) margin(0 0 0 2))        ///
          ytitle("Change in happiness", size(medlarge)) 


************************************************************
*******                Anticipation Effects     ************
************************************************************

* Anticipation up to T=-1
capture drop    h3 ym
gen     h3 = year - h2 if treat==1        //event centered time scale for treated
recode  h3  min/-2=-2   15/max=15         //group on the borders
gen     ym = h3+2                         //bring all to positive values
recode  ym  . = 0                         //not-treated are in ref group

xtreg happy i.ym age loghhinc, fe vce(cluster id)

coefplot, keep(*.ym) vertical yline(0) xline(1.5) recast(line) lwidth(thick) lcolor(blue)  ///
          ciopts(recast(rline) lpattern(dash) lwidth(medthick) lcolor(green))              ///
		  coeflabels(1.ym="-1" 2.ym="0" 3.ym="1" 4.ym="2" 5.ym="3" 6.ym="4"                ///
		             7.ym="5" 8.ym="6" 9.ym="7" 10.ym="8" 11.ym="9" 12.ym="10"             ///
					 13.ym="11" 14.ym="12" 15.ym="13" 16.ym="14" 17.ym="15")               ///  
		  ylabel(-.3(.1).5, grid angle(0) labsize(medium) format(%3.1f))                   ///
          xtitle("Years before / after marriage", size(medlarge) margin(0 0 0 2))          ///
          ytitle("Change in happiness", size(medlarge)) 

* Anticipation up to T=-6
capture drop    h3 ym
gen     h3 = year - h2 if treat==1        //event centered time scale for treated
recode  h3  min/-7=-7   15/max=15         //group on the borders
gen     ym = h3+7                         //bring all to positive values
recode  ym  . = 0                         //not-treated are in ref group

xtreg happy i.ym age loghhinc, fe vce(cluster id)

coefplot, keep(*.ym) vertical yline(0) xline(6.5) recast(line) lwidth(thick) lcolor(blue)  ///
          ciopts(recast(rline) lpattern(dash) lwidth(medthick) lcolor(green))              ///
		  coeflabels(1.ym="-6" 2.ym="-5" 3.ym="-4" 4.ym="-3" 5.ym="-2" 6.ym="-1"           ///
		             7.ym="0" 8.ym="1" 9.ym="2" 10.ym="3" 11.ym="4" 12.ym="5"              ///
					 13.ym="6" 14.ym="7" 15.ym="8" 16.ym="9" 17.ym="10"                    ///
					 18.ym="11" 19.ym="12" 20.ym="13" 21.ym="14" 22.ym="15")               ///  
		  ylabel(-.1(.1).7, grid angle(0) labsize(medium) format(%3.1f))                   ///
          xtitle("Years before / after marriage", size(medlarge) margin(0 0 0 2))          ///
          ytitle("Change in happiness", size(medlarge)) 


/* This is the distributed FE with Lags and Leads (I am not quite sure what this model does)
gen     mevent=0
replace mevent=1 if year==h2   //marriage event dummy (only pyr immediately after marriage)
xtreg happy mevent L(1/10).mevent F(1/6).mevent age loghhinc, fe vce(cluster id)
*/


**********************************************************
*** Investigating the income-effect in more detail    ****
**********************************************************

* Continous linear modelling
gen hhincz = hhinc / 10000				 // HHincome in 10000
xtreg   happy marry age hhincz, fe  vce(cluster id)

* Categorical modelling
summ hhinc, detail
egen inc = cut(hhinc),                      ///group HHincome
     at(0,10000,20000,30000,40000,50000,60000,100000,2000000) icodes
table inc, contents(freq min hhinc max hhinc mean hhinc)   //verify the grouping
xtreg   happy marry age i.inc, fe  vce(cluster id)
coefplot, xline(0) keep(*.inc) base                                      ///
          coeflabels(0.inc="0-10 (ref.)" 1.inc="10-20" 2.inc="20-30"     ///
		             3.inc="30-40" 4.inc="40-50" 5.inc="50-60"           ///
					 6.inc="60-100" 7.inc="100 +")                        ///
		  relocate(6.inc=8    7.inc=11)                                  ///
          xlabel(0(.1).5, grid)                                          ///
          xtitle("Effect of income on happiness", size(medlarge) margin(0 0 0 2)) ///
		  ytitle("Gross annual household income (in 10,000 EUR)", size(med))


		
************************************************************
*******           Interaction Effects           ************
************************************************************
		
********************************
* Sex specific income effect
********************************

* Standard interaction specification
xtreg happy marry i.woman##c.loghhinc age, fe vce(cluster id)

* Nested effects specification
xtreg happy marry i.woman#c.loghhinc age, fe vce(cluster id)
margins , dydx(loghhinc) over(woman) noatlegend  //cond effect plot by sex
marginsplot, horizontal xline(0) plotopts(connect(i))                        ///
        xlabel(0(0.05).2, grid labsize(medium) format(%3.2f))                ///
		ylabel(0 "Men" 1 "Women", labsize(medlarge)) ysc(r(-0.5 1.5))        ///
		ytitle(" ")                                                          ///
		xtitle("Effect of 'HHincome' on happiness", size(large) margin(0 0 0 2)) ///
        ci1opts(lwidth(thick) lcolor(blue))  plot1opts(msize(large) mcolor(red)) ///
		title("Conditional Effect Plot by Sex", size(large))

* Full interaction gives same results as separate regressions!		
xtreg happy i.woman#i.marry i.woman#c.loghhinc i.woman#c.age, fe vce(cluster id)
xtreg happy marry loghhinc age if woman==0, fe vce(cluster id)
xtreg happy marry loghhinc age if woman==1, fe vce(cluster id)


		
**************************************
* Sex specific marriage effect
**************************************

* Standard interaction specification
xtreg happy i.woman##(i.marry c.yrsmarried##c.yrsmarried) loghhinc age, fe vce(cluster id)

* Nested effects specification
xtreg happy i.woman#(i.marry c.yrsmarried##c.yrsmarried) loghhinc age, fe vce(cluster id)

* Are the time paths significantly different?
test (1.marry                     # 0.woman = 1.marry                     # 1.woman) ///
     (c.yrsmarried                # 0.woman = c.yrsmarried                # 1.woman) ///
     (c.yrsmarried # c.yrsmarried # 0.woman = c.yrsmarried # c.yrsmarried # 1.woman)

* Plotting the marginal marriage effects by sex (Conditional Effect Plot)
* The plot with margins does not work??
/* margins, at(marry=(0 1) yrsmarried=(0(1)15)) contrast(atcontrast(r._at) lincom) over(woman)
marginsplot, recast(line) recastci(rline) yline(0, lcolor(black)) x(yrsmarried)  */

* Therefore, we use a plot "made by hand"
for new bm sesm bw sesw : generate X=.
forvalues y=0/20 {
	quietly lincom 1.marry#0.woman + c.yrsmarried#0.woman*`y' + c.yrsmarried#c.yrsmarried#0.woman*`y'*`y'
	quietly replace bm   = r(estimate)   if _n==`y'+1	
	quietly replace sesm = r(se)         if _n==`y'+1	
}
generate upperm=bm+1.96*sesm    // upper CI
generate lowerm=bm-1.96*sesm    // lower CI
forvalues y=0/20 {
	quietly lincom 1.marry#1.woman + c.yrsmarried#1.woman*`y' + c.yrsmarried#c.yrsmarried#1.woman*`y'*`y'
	quietly replace bw   = r(estimate)   if _n==`y'+1	
	quietly replace sesw = r(se)         if _n==`y'+1	
}
generate upperw=bw+1.96*sesw    // upper CI
generate lowerw=bw-1.96*sesw    // lower CI
generate md    = _n-1           // artificial time axis starting at 0 (for plotting)
line bw bm md if md<=15, sort           ///
        lpattern(solid solid)                   /// 
        lwidth(thick thick)             ///       
        lcolor(red blue)                          ///
        ylabel(-.3(.1).5, grid angle(0) labsize(medium) format(%3.1f))   /// 
		xlabel(0(1)15, labsize(medium))                                  ///
        yline(0, lcolor(black))                                          ///
        legend(pos(7) ring(0) row(2) order (1 2) lab(1 "Women")          ///
		     lab(2 "Men") size(medlarge))                                /// 
        xtitle("Years since marriage", size(large) margin(0 0 0 2))      ///
        ytitle("Effect of 'marriage' on happiness", size(large))         ///
        title("Conditional Effect Plot by Sex")

**************************************
* Marriage effect varies by HHincome
**************************************

gen hhinct = hhinc / 10000   // HHincome in 10000 (with top coding)
recode hhinct 5/max=5        // 50 Tsd. seems to be a satisfaction threshold

* Standard interaction specification
xtreg happy i.marry##c.hhinct age, fe vce(cluster id)

margins ,at(hhinct=(0(1)5)) dydx(marry) noatlegend                   
marginsplot, recast(line) recastci(rline) yline(0, lcolor(red))        ///
   plotopts(lpattern(solid) lwidth(thick) lcolor(blue))     	       ///
   ciopts(lpattern(dash) lwidth(medthick) lcolor(green))               ///
   ylabel(-.1(.05).4, grid angle(0) labsize(medium) format(%3.2f))         /// 
   xlabel(0(1)5, labsize(medium))                                     ///
   xtitle("HHincome (in 10,000 EUR)", size(medlarge) margin(0 0 0 2)) ///
   ytitle("Effect of 'marriage' on happiness", size(medlarge))         ///
   title("Conditional Effect Plot by HHincome", size(large)) 

